/**************************************
These regressions are the estimates displayed in Table A4: Bias By Residency

Author: Andrew Foote
Date: 11/10/2022

**************************************/
#delimit ;
set more off; 

include "./config.do" ;
cap log close; 
log using "./02.07.regs_total_byres_$date.log", replace; 

use "$datadir/residency_pikopeid.dta", clear ;

collapse (max) resident, by(pik) ;
sort pik  ;

tempfile res ;
save `res', replace; 

use "$datadir/all_earnings_long.dta", clear; 


sort pik  ;

merge m:1 pik  using "`res'";

tab _merge;
keep if _merge == 3 | _merge == 1;

tab resident, m ;

replace resident = 0 if resident == . ; 

des;

tab year_grad;  

/*************************************
First, keep only people
who aren't missing demographics.
*************************************/


include "./restrictions.do" ;

gen log_earn_total = log(total_earnings) ;

include "./top_school.do" ;

tab opeid top_school, m ;

tab ui_state resident, m ;

preserve; 

 gen all = 1 ;
gen national = (national_earnings > 0) ;
gen state = (instate_earnings > 0 ) ;
gen total = (total_earnings > 0 ) ;

collapse (max) all national state total resident,by(pik) ;

foreach var in all national state total { ;
	tab `var' resident;
} ; 

restore ;

/*********************************************************
Going to run a number of regressions (controlling for 
demographics) 

Outcomes: 

Total Earnings
Total Earnings > 0
Log Earnings
p25 log
p50 log
p75 log
**********************************************************/  

local demogs "male white hispanic black asian inst_state*" ;


postfile support spec str15 comment str8 category res Nobs share_topschool count_top count_nottop beta se 
                                using $supportdir/support_regs_byres_$date.dta, replace;


foreach res in 1 0 { ;

                     
foreach category in national instate total { ;

	reg `category'_earnings top_school i.year i.year_grad `demogs' if year > year_grad & resident == `res' , cluster(pik) ;
              local beta2_`category'_1 = _b[top_school];
              local se2_`category'_1 = _se[top_school] ;
              local N2_`category'_1 =  e(N) ;

               sum top_school if e(sample) ;
               local share_top = r(mean) ;
               count if top_school == 1 & e(sample) ;
               local count_top = r(N) ;
               count if top_school ==0 & e(sample) ;
               local count_nottop= r(N) ;
               post support (1) ("Controls") ("`category'") (`res') (e(N)) (`share_top') (`count_top') (`count_nottop') (_b[top_school]) (_se[top_school]);

	reg `category'_earnings top_school i.year i.year_grad `demogs' if year>year_grad  & `category'_earnings>0 & resident == `res', cluster(pik) ;               
              local beta2_`category'_2 = _b[top_school];
              local se2_`category'_2 = _se[top_school] ;
              local N2_`category'_2 =  e(N) ;

              sum top_school if e(sample) ;
               local share_top = r(mean) ;
               count if top_school == 1 & e(sample) ;
               local count_top = r(N) ;
               count if top_school ==0 & e(sample) ;
               local count_nottop= r(N) ;
               post support (2) ("Controls") ("`category'") (`res') (e(N)) (`share_top') (`count_top') (`count_nottop') (_b[top_school]) (_se[top_school]);                         
	
	reg log_earn_`category' top_school i.year i.year_grad `demogs' if year>year_grad  & `category'_earnings>0 & resident == `res', vce(cluster pik) ;
              local beta2_`category'_3 = _b[top_school];
              local se2_`category'_3 = _se[top_school] ;
              local N2_`category'_3 =  e(N) ;

              sum top_school if e(sample) ;
               local share_top = r(mean) ;
               count if top_school == 1 & e(sample) ;
               local count_top = r(N) ;
               count if top_school ==0 & e(sample) ;
               local count_nottop= r(N) ;
               post support (3) ("Controls") ("`category'") (`res') (e(N)) (`share_top') (`count_top') (`count_nottop') (_b[top_school]) (_se[top_school]);                         
	
	foreach pctile in 25 50 75 { ;
		di "********************************************" ;
		di "***************** QREG: `pctile' *************" ;
		di "*********************************************";
		qreg log_earn_`category' top_school i.year i.year_grad `demogs' 
			if year > year_grad  & `category'_earnings>0 & resident == `res' ,  q(`pctile') iterate(1000);

                        local beta2_`category'_`pctile' = _b[top_school];
                        local se2_`category'_`pctile' = _se[top_school] ;
                        local N2_`category'_`pctile' =  e(N) ;

                        sum top_school if e(sample) ;
                        local share_top = r(mean) ;
                        count if top_school == 1 & e(sample) ;
                        local count_top = r(N) ;
                        count if top_school ==0 & e(sample) ;
                        local count_nottop= r(N) ;
                        post support (`pctile') ("Controls") ("`category'") (`res') (e(N)) (`share_top') (`count_top') (`count_nottop') (_b[top_school]) (_se[top_school]) ;       
	} ;
	
} ;

} ;                     



postclose support ; 

use $supportdir/support_regs_byres_$date.dta, clear ; 

list ;

                                    
